// initialized by arch/ia64/setup.c:find_initrd()
unsigned long initrd_start = 0, initrd_end = 0;
+#define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend))
+
extern int loadelfimage(char *);
extern int readelfimage_base_and_size(char *, unsigned long,
- unsigned long *, unsigned long *, unsigned long *);
+ unsigned long *, unsigned long *, unsigned long *);
unsigned long map_domain_page0(struct domain *);
extern unsigned long dom_fw_setup(struct domain *, char *, int);
/* this belongs in include/asm, but there doesn't seem to be a suitable place */
void free_perdomain_pt(struct domain *d)
{
- dummy();
- //free_page((unsigned long)d->mm.perdomain_pt);
+ dummy();
+ //free_page((unsigned long)d->mm.perdomain_pt);
}
int hlt_counter;
void disable_hlt(void)
{
- hlt_counter++;
+ hlt_counter++;
}
void enable_hlt(void)
{
- hlt_counter--;
+ hlt_counter--;
}
static void default_idle(void)
{
- if ( hlt_counter == 0 )
- {
+ if ( hlt_counter == 0 )
+ {
local_irq_disable();
- if ( !softirq_pending(smp_processor_id()) )
- safe_halt();
- //else
+ if ( !softirq_pending(smp_processor_id()) )
+ safe_halt();
+ //else
local_irq_enable();
- }
+ }
}
void continue_cpu_idle_loop(void)
{
- int cpu = smp_processor_id();
- for ( ; ; )
- {
+ int cpu = smp_processor_id();
+ for ( ; ; )
+ {
#ifdef IA64
// __IRQ_STAT(cpu, idle_timestamp) = jiffies
#else
- irq_stat[cpu].idle_timestamp = jiffies;
+ irq_stat[cpu].idle_timestamp = jiffies;
#endif
- while ( !softirq_pending(cpu) )
- default_idle();
- do_softirq();
- }
+ while ( !softirq_pending(cpu) )
+ default_idle();
+ do_softirq();
+ }
}
void startup_cpu_idle_loop(void)
{
- /* Just some sanity to ensure that the scheduler is set up okay. */
- ASSERT(current->domain == IDLE_DOMAIN_ID);
- domain_unpause_by_systemcontroller(current->domain);
- __enter_scheduler();
-
- /*
- * Declares CPU setup done to the boot processor.
- * Therefore memory barrier to ensure state is visible.
- */
- smp_mb();
- init_idle();
+ /* Just some sanity to ensure that the scheduler is set up okay. */
+ ASSERT(current->domain == IDLE_DOMAIN_ID);
+ domain_unpause_by_systemcontroller(current->domain);
+ __enter_scheduler();
+
+ /*
+ * Declares CPU setup done to the boot processor.
+ * Therefore memory barrier to ensure state is visible.
+ */
+ smp_mb();
+ init_idle();
#if 0
//do we have to ensure the idle task has a shared page so that, for example,
//region registers can be loaded from it. Apparently not...
}
#endif
- continue_cpu_idle_loop();
+ continue_cpu_idle_loop();
}
struct domain *arch_alloc_domain_struct(void)
// heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread()
// and linux/arch/ia64/kernel/process.c:kernel_thread()
void new_thread(struct exec_domain *ed,
- unsigned long start_pc,
- unsigned long start_stack,
- unsigned long start_info)
+ unsigned long start_pc,
+ unsigned long start_stack,
+ unsigned long start_info)
{
struct domain *d = ed->domain;
struct switch_stack *sw;
return (IS_ELF(*ehdr));
}
+static void copy_memory(void *dst, void *src, int size)
+{
+ if (IS_XEN_ADDRESS(dom0,src)) {
+ memcpy(dst,src,size);
+ }
+ else {
+ if (__copy_from_user(dst,src,size))
+ printf("incomplete user copy\n");
+ }
+}
+
void loaddomainelfimage(struct domain *d, unsigned long image_start)
{
- char *elfbase = image_start;
- Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start;
- Elf_Phdr *phdr;
- int h, filesz, memsz, paddr;
- unsigned long elfaddr, dom_mpaddr, dom_imva;
- struct page *p;
+ char *elfbase = image_start;
+ //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start;
+ Elf_Ehdr ehdr;
+ Elf_Phdr phdr;
+ int h, filesz, memsz, paddr;
+ unsigned long elfaddr, dom_mpaddr, dom_imva;
+ struct page *p;
- for ( h = 0; h < ehdr->e_phnum; h++ ) {
- phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
- //if ( !is_loadable_phdr(phdr) )
- if ((phdr->p_type != PT_LOAD)) {
- continue;
- }
- filesz = phdr->p_filesz; memsz = phdr->p_memsz;
- elfaddr = elfbase + phdr->p_offset;
- dom_mpaddr = phdr->p_paddr;
+ copy_memory(&ehdr,image_start,sizeof(Elf_Ehdr));
+ for ( h = 0; h < ehdr.e_phnum; h++ ) {
+ copy_memory(&phdr,elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize),
+ sizeof(Elf_Phdr));
+ //if ( !is_loadable_phdr(phdr) )
+ if ((phdr.p_type != PT_LOAD)) {
+ continue;
+ }
+ filesz = phdr.p_filesz; memsz = phdr.p_memsz;
+ elfaddr = elfbase + phdr.p_offset;
+ dom_mpaddr = phdr.p_paddr;
//printf("p_offset: %x, size=%x\n",elfaddr,filesz);
#ifdef CONFIG_DOMAIN0_CONTIGUOUS
if (d == dom0) {
while(1);
}
dom_imva = __va(dom_mpaddr + dom0_start);
- memcpy(dom_imva,elfaddr,filesz);
+ copy_memory(dom_imva,elfaddr,filesz);
if (memsz > filesz) memset(dom_imva+filesz,0,memsz-filesz);
//FIXME: This test for code seems to find a lot more than objdump -x does
- if (phdr->p_flags & PF_X) privify_memory(dom_imva,filesz);
+ if (phdr.p_flags & PF_X) privify_memory(dom_imva,filesz);
}
else
#endif
dom_imva = __va(page_to_phys(p));
if (filesz > 0) {
if (filesz >= PAGE_SIZE)
- memcpy(dom_imva,elfaddr,PAGE_SIZE);
+ copy_memory(dom_imva,elfaddr,PAGE_SIZE);
else { // copy partial page, zero the rest of page
- memcpy(dom_imva,elfaddr,filesz);
+ copy_memory(dom_imva,elfaddr,filesz);
memset(dom_imva+filesz,0,PAGE_SIZE-filesz);
}
//FIXME: This test for code seems to find a lot more than objdump -x does
- if (phdr->p_flags & PF_X)
+ if (phdr.p_flags & PF_X)
privify_memory(dom_imva,PAGE_SIZE);
}
else if (memsz > 0) // always zero out entire page
memsz -= PAGE_SIZE; filesz -= PAGE_SIZE;
elfaddr += PAGE_SIZE; dom_mpaddr += PAGE_SIZE;
}
- }
+ }
+}
+
+int
+parsedomainelfimage(char *elfbase, unsigned long elfsize, unsigned long *entry)
+{
+ Elf_Ehdr ehdr;
+
+ copy_memory(&ehdr,elfbase,sizeof(Elf_Ehdr));
+
+ if ( !elf_sanity_check(&ehdr) ) {
+ printk("ELF sanity check failed.\n");
+ return -EINVAL;
+ }
+
+ if ( (ehdr.e_phoff + (ehdr.e_phnum * ehdr.e_phentsize)) > elfsize )
+ {
+ printk("ELF program headers extend beyond end of image.\n");
+ return -EINVAL;
+ }
+
+ if ( (ehdr.e_shoff + (ehdr.e_shnum * ehdr.e_shentsize)) > elfsize )
+ {
+ printk("ELF section headers extend beyond end of image.\n");
+ return -EINVAL;
+ }
+
+ /* Find the section-header strings table. */
+ if ( ehdr.e_shstrndx == SHN_UNDEF )
+ {
+ printk("ELF image has no section-header strings table (shstrtab).\n");
+ return -EINVAL;
+ }
+
+ *entry = ehdr.e_entry;
+
+ return 0;
}
void alloc_dom0(void)
{
#ifdef CONFIG_DOMAIN0_CONTIGUOUS
- if (platform_is_hp_ski()) {
+ if (platform_is_hp_ski()) {
dom0_size = 128*1024*1024; //FIXME: Should be configurable
- }
- printf("alloc_dom0: starting (initializing %d MB...)\n",dom0_size/(1024*1024));
- dom0_start = __alloc_bootmem(dom0_size,dom0_align,__pa(MAX_DMA_ADDRESS));
- if (!dom0_start) {
+ }
+ printf("alloc_dom0: starting (initializing %d MB...)\n",dom0_size/(1024*1024));
+ dom0_start = __alloc_bootmem(dom0_size,dom0_align,__pa(MAX_DMA_ADDRESS));
+ if (!dom0_start) {
printf("construct_dom0: can't allocate contiguous memory size=%p\n",
dom0_size);
while(1);
- }
- printf("alloc_dom0: dom0_start=%p\n",dom0_start);
+ }
+ printf("alloc_dom0: dom0_start=%p\n",dom0_start);
#else
- dom0_start = 0;
+ dom0_start = 0;
#endif
}
int construct_dom0(struct domain *d,
- unsigned long image_start, unsigned long image_len,
- unsigned long initrd_start, unsigned long initrd_len,
- char *cmdline)
-{
- char *dst;
- int i, rc;
- unsigned long pfn, mfn;
- unsigned long nr_pt_pages;
- unsigned long count;
- //l2_pgentry_t *l2tab, *l2start;
- //l1_pgentry_t *l1tab = NULL, *l1start = NULL;
- struct pfn_info *page = NULL;
- start_info_t *si;
- struct exec_domain *ed = d->exec_domain[0];
-
- struct domain_setup_info dsi;
- unsigned long p_start;
- unsigned long pkern_start;
- unsigned long pkern_entry;
- unsigned long pkern_end;
-
- extern void physdev_init_dom0(struct domain *);
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ char *dst;
+ int i, rc;
+ unsigned long pfn, mfn;
+ unsigned long nr_pt_pages;
+ unsigned long count;
+ //l2_pgentry_t *l2tab, *l2start;
+ //l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+ struct pfn_info *page = NULL;
+ start_info_t *si;
+ struct exec_domain *ed = d->exec_domain[0];
+
+ struct domain_setup_info dsi;
+ unsigned long p_start;
+ unsigned long pkern_start;
+ unsigned long pkern_entry;
+ unsigned long pkern_end;
+
+ extern void physdev_init_dom0(struct domain *);
//printf("construct_dom0: starting\n");
- /* Sanity! */
+ /* Sanity! */
#ifndef CLONE_DOMAIN0
- if ( d != dom0 )
- BUG();
- if ( test_bit(DF_CONSTRUCTED, &d->d_flags) )
- BUG();
+ if ( d != dom0 )
+ BUG();
+ if ( test_bit(DF_CONSTRUCTED, &d->d_flags) )
+ BUG();
#endif
- memset(&dsi, 0, sizeof(struct domain_setup_info));
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
- printk("*** LOADING DOMAIN 0 ***\n");
+ printk("*** LOADING DOMAIN 0 ***\n");
d->max_pages = dom0_size/PAGE_SIZE;
image_start = __va(ia64_boot_param->initrd_start);
//printk("First word of image: %lx\n",*(unsigned long *)image_start);
//printf("construct_dom0: about to call parseelfimage\n");
- rc = parseelfimage(image_start, image_len, &dsi);
- if ( rc != 0 )
- return rc;
+ rc = parseelfimage(image_start, image_len, &dsi);
+ if ( rc != 0 )
+ return rc;
- p_start = dsi.v_start;
- pkern_start = dsi.v_kernstart;
- pkern_end = dsi.v_kernend;
- pkern_entry = dsi.v_kernentry;
+ p_start = dsi.v_start;
+ pkern_start = dsi.v_kernstart;
+ pkern_end = dsi.v_kernend;
+ pkern_entry = dsi.v_kernentry;
//printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry);
- if ( (p_start & (PAGE_SIZE-1)) != 0 )
- {
- printk("Initial guest OS must load to a page boundary.\n");
- return -EINVAL;
- }
+ if ( (p_start & (PAGE_SIZE-1)) != 0 )
+ {
+ printk("Initial guest OS must load to a page boundary.\n");
+ return -EINVAL;
+ }
- printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
- " Kernel image: %lx->%lx\n"
- " Entry address: %lx\n"
- " Init. ramdisk: (NOT IMPLEMENTED YET)\n",
- pkern_start, pkern_end, pkern_entry);
+ printk("METAPHYSICAL MEMORY ARRANGEMENT:\n"
+ " Kernel image: %lx->%lx\n"
+ " Entry address: %lx\n"
+ " Init. ramdisk: (NOT IMPLEMENTED YET)\n",
+ pkern_start, pkern_end, pkern_entry);
- if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
- {
- printk("Initial guest OS requires too much space\n"
- "(%luMB is greater than %luMB limit)\n",
- (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
- return -ENOMEM;
- }
+ if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) )
+ {
+ printk("Initial guest OS requires too much space\n"
+ "(%luMB is greater than %luMB limit)\n",
+ (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20);
+ return -ENOMEM;
+ }
- // if high 3 bits of pkern start are non-zero, error
+ // if high 3 bits of pkern start are non-zero, error
- // if pkern end is after end of metaphysical memory, error
- // (we should be able to deal with this... later)
+ // if pkern end is after end of metaphysical memory, error
+ // (we should be able to deal with this... later)
- //
+ //
#if 0
- strcpy(d->name,"Domain0");
+ strcpy(d->name,"Domain0");
#endif
// prepare domain0 pagetable (maps METAphysical to physical)
// following is roughly mm_init() in linux/kernel/fork.c
d->arch.mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
if (unlikely(!d->arch.mm)) {
- printk("Can't allocate mm_struct for domain0\n");
- return -ENOMEM;
+ printk("Can't allocate mm_struct for domain0\n");
+ return -ENOMEM;
}
memset(d->arch.mm, 0, sizeof(*d->arch.mm));
d->arch.mm->pgd = pgd_alloc(d->arch.mm);
if (unlikely(!d->arch.mm->pgd)) {
- printk("Can't allocate pgd for domain0\n");
- return -ENOMEM;
+ printk("Can't allocate pgd for domain0\n");
+ return -ENOMEM;
}
- /* Mask all upcalls... */
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
- /* Copy the OS image. */
- //(void)loadelfimage(image_start);
+ /* Copy the OS image. */
+ //(void)loadelfimage(image_start);
loaddomainelfimage(d,image_start);
- /* Copy the initial ramdisk. */
- //if ( initrd_len != 0 )
- // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+ /* Copy the initial ramdisk. */
+ //if ( initrd_len != 0 )
+ // memcpy((void *)vinitrd_start, initrd_start, initrd_len);
#if 0
- /* Set up start info area. */
- //si = (start_info_t *)vstartinfo_start;
- memset(si, 0, PAGE_SIZE);
- si->nr_pages = d->tot_pages;
- si->shared_info = virt_to_phys(d->shared_info);
- si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
- //si->pt_base = vpt_start;
- //si->nr_pt_frames = nr_pt_pages;
- //si->mfn_list = vphysmap_start;
-
- if ( initrd_len != 0 )
- {
- //si->mod_start = vinitrd_start;
- si->mod_len = initrd_len;
- printk("Initrd len 0x%lx, start at 0x%08lx\n",
- si->mod_len, si->mod_start);
- }
-
- dst = si->cmd_line;
- if ( cmdline != NULL )
- {
- for ( i = 0; i < 255; i++ )
- {
- if ( cmdline[i] == '\0' )
- break;
- *dst++ = cmdline[i];
- }
- }
- *dst = '\0';
-
- zap_low_mappings(); /* Do the same for the idle page tables. */
+ /* Set up start info area. */
+ //si = (start_info_t *)vstartinfo_start;
+ memset(si, 0, PAGE_SIZE);
+ si->nr_pages = d->tot_pages;
+ si->shared_info = virt_to_phys(d->shared_info);
+ si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
+ //si->pt_base = vpt_start;
+ //si->nr_pt_frames = nr_pt_pages;
+ //si->mfn_list = vphysmap_start;
+
+ if ( initrd_len != 0 )
+ {
+ //si->mod_start = vinitrd_start;
+ si->mod_len = initrd_len;
+ printk("Initrd len 0x%lx, start at 0x%08lx\n",
+ si->mod_len, si->mod_start);
+ }
+
+ dst = si->cmd_line;
+ if ( cmdline != NULL )
+ {
+ for ( i = 0; i < 255; i++ )
+ {
+ if ( cmdline[i] == '\0' )
+ break;
+ *dst++ = cmdline[i];
+ }
+ }
+ *dst = '\0';
+
+ zap_low_mappings(); /* Do the same for the idle page tables. */
#endif
-
- /* Give up the VGA console if DOM0 is configured to grab it. */
+
+ /* Give up the VGA console if DOM0 is configured to grab it. */
#ifdef IA64
if (cmdline != NULL)
#endif
- console_endboot(strstr(cmdline, "tty0") != NULL);
+ console_endboot(strstr(cmdline, "tty0") != NULL);
- /* DOM0 gets access to everything. */
+ /* DOM0 gets access to everything. */
#ifdef CLONE_DOMAIN0
if (d == dom0)
#endif
- physdev_init_dom0(d);
+ physdev_init_dom0(d);
- set_bit(DF_CONSTRUCTED, &d->d_flags);
+ set_bit(DF_CONSTRUCTED, &d->d_flags);
- new_thread(ed, pkern_entry, 0, 0);
- // FIXME: Hack for keyboard input
+ new_thread(ed, pkern_entry, 0, 0);
+ // FIXME: Hack for keyboard input
#ifdef CLONE_DOMAIN0
if (d == dom0)
#endif
- serial_input_init();
- if (d == dom0) {
- ed->vcpu_info->arch.delivery_mask[0] = -1L;
- ed->vcpu_info->arch.delivery_mask[1] = -1L;
- ed->vcpu_info->arch.delivery_mask[2] = -1L;
- ed->vcpu_info->arch.delivery_mask[3] = -1L;
- }
- else __set_bit(0x30,ed->vcpu_info->arch.delivery_mask);
-
- return 0;
+ serial_input_init();
+ if (d == dom0) {
+ ed->vcpu_info->arch.delivery_mask[0] = -1L;
+ ed->vcpu_info->arch.delivery_mask[1] = -1L;
+ ed->vcpu_info->arch.delivery_mask[2] = -1L;
+ ed->vcpu_info->arch.delivery_mask[3] = -1L;
+ }
+ else __set_bit(0x30,ed->vcpu_info->arch.delivery_mask);
+
+ return 0;
+}
+
+// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
+int construct_domN(struct domain *d,
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ int i, rc;
+ struct exec_domain *ed = d->exec_domain[0];
+ unsigned long pkern_entry;
+
+ if ( test_bit(DF_CONSTRUCTED, &d->d_flags) ) BUG();
+
+ printk("*** LOADING DOMAIN %d ***\n",d->id);
+
+ d->max_pages = dom0_size/PAGE_SIZE; // FIXME: use dom0 size
+ // FIXME: use domain0 command line
+ rc = parsedomainelfimage(image_start, image_len, &pkern_entry);
+ printk("parsedomainelfimage returns %d\n",rc);
+ if ( rc != 0 ) return rc;
+
+ d->arch.mm = kmem_cache_alloc(mm_cachep, SLAB_KERNEL);
+ if (unlikely(!d->arch.mm)) {
+ printk("Can't allocate mm_struct for domain %d\n",d->id);
+ return -ENOMEM;
+ }
+ memset(d->arch.mm, 0, sizeof(*d->arch.mm));
+ d->arch.mm->pgd = pgd_alloc(d->arch.mm);
+ if (unlikely(!d->arch.mm->pgd)) {
+ printk("Can't allocate pgd for domain %d\n",d->id);
+ return -ENOMEM;
+ }
+
+
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+
+ /* Copy the OS image. */
+ printk("calling loaddomainelfimage\n");
+ loaddomainelfimage(d,image_start);
+ printk("loaddomainelfimage returns\n");
+
+ set_bit(DF_CONSTRUCTED, &d->d_flags);
+
+ printk("calling new_thread\n");
+ new_thread(ed, pkern_entry, 0, 0);
+ printk("new_thread returns\n");
+ __set_bit(0x30,ed->vcpu_info->arch.delivery_mask);
+
+ return 0;
+}
+
+// FIXME: When dom0 can construct domains, this goes away (or is rewritten)
+int launch_domainN(unsigned long start, unsigned long len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ static int next = 100; // FIXME
+
+ struct domain *d = do_createdomain(next,0);
+ if (!d) {
+ printf("launch_domainN: couldn't create\n");
+ return 1;
+ }
+ if (construct_domN(d, start, len, 0, 0, 0)) {
+ printf("launch_domainN: couldn't construct(id=%d,%lx,%lx)\n",
+ d->id,start,len);
+ return 2;
+ }
+ domain_unpause_by_systemcontroller(d);
}
void machine_restart(char * __unused)